1 Setup

library(rlang)
## Warning: package 'rlang' was built under R version 4.1.2
library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stats)
library(ggpubr)
## Loading required package: ggplot2
library(vegan)
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.1.2
## Loading required package: lattice
## This is vegan 2.5-7
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.6     ✓ purrr   0.3.4
## ✓ tidyr   1.2.0     ✓ forcats 0.5.1
## ✓ readr   2.0.2
## Warning: package 'tidyr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x purrr::%@%()         masks rlang::%@%()
## x purrr::as_function() masks rlang::as_function()
## x dplyr::filter()      masks stats::filter()
## x purrr::flatten()     masks rlang::flatten()
## x purrr::flatten_chr() masks rlang::flatten_chr()
## x purrr::flatten_dbl() masks rlang::flatten_dbl()
## x purrr::flatten_int() masks rlang::flatten_int()
## x purrr::flatten_lgl() masks rlang::flatten_lgl()
## x purrr::flatten_raw() masks rlang::flatten_raw()
## x purrr::invoke()      masks rlang::invoke()
## x dplyr::lag()         masks stats::lag()
## x purrr::splice()      masks rlang::splice()
#library(MCMC.OTU)
#install.packages("remotes")
#remotes::install_github("Jtrachsel/funfuns")
library("funfuns")

Read in data

#setwd('/Users/nicolakriefall/Google Drive (nicfall@bu.edu)/Moorea_revisions/mr16s_revised/analysis/03.community_comp')
setwd("/Volumes/GoogleDrive-104519233854090018057/My Drive/Moorea_revisions/moorea_holobiont_revised/mr16s_revised/03.community_comp")

samdf <- read.csv("mr16s_sampledata_plusneg copy.csv",header=TRUE)
load("taxa2 copy.Rdata")

#load("ps.clean.Rdata")
#load("ps.rare.Rdata")
load("ps.rare.trim.Rdata")
load("ps.trim.Rdata")

Rename ASVs to be more informative

tax <- as.data.frame(ps.rare.trim@tax_table@.Data)
## Loading required package: phyloseq
tax.clean <- data.frame(row.names = row.names(tax),
                        Kingdom = str_replace(tax[,1], "D_0__",""),
                        Phylum = str_replace(tax[,2], "D_1__",""),
                        Class = str_replace(tax[,3], "D_2__",""),
                        Order = str_replace(tax[,4], "D_3__",""),
                        Family = str_replace(tax[,5], "D_4__",""),
                        Genus = str_replace(tax[,6], "D_5__",""),
                        Species = str_replace(tax[,7], "D_6__",""),
                        stringsAsFactors = FALSE)
tax.clean[is.na(tax.clean)] <- ""

for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
####### Fill holes in the tax table
tax.clean[is.na(tax.clean)] <- ""
for (i in 1:nrow(tax.clean)){
  if (tax.clean[i,2] == ""){
    kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
    tax.clean[i, 2:7] <- kingdom
  } else if (tax.clean[i,3] == ""){
    phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
    tax.clean[i, 3:7] <- phylum
  } else if (tax.clean[i,4] == ""){
    class <- paste("Class_", tax.clean[i,3], sep = "")
    tax.clean[i, 4:7] <- class
  } else if (tax.clean[i,5] == ""){
    order <- paste("Order_", tax.clean[i,4], sep = "")
    tax.clean[i, 5:7] <- order
  } else if (tax.clean[i,6] == ""){
    family <- paste("Family_", tax.clean[i,5], sep = "")
    tax.clean[i, 6:7] <- family
  } else if (tax.clean[i,7] == ""){
    tax.clean$Species[i] <- paste("Genus",tax.clean$Genus[i], sep = "_")
  }
}

tax_table(ps.rare.trim) <- as.matrix(tax.clean)

2 Core vs. accessory

2.1 Core

## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2020 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## Attaching package: 'microbiome'
## The following object is masked from 'package:vegan':
## 
##     diversity
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:base':
## 
##     transform
pseq.core <- core(ps.trim, detection = 0, prevalence = .7)
pseq.core <- core(ps.rare.trim, detection = 0, prevalence = .7)
pseq.core #9 taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 9 taxa and 84 samples ]
## sample_data() Sample Data:       [ 84 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 9 taxa by 7 taxonomic ranks ]
#saving
#core.tax <- data.frame(pseq.core@tax_table)
#write.csv(core.tax,"core.taxa.csv")

ps_glom <- tax_glom(pseq.core, "Genus")
ps0 <- transform_sample_counts(ps_glom, function(x) x / sum(x))
ps1 <- merge_samples(ps0, "site_zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps2 <- transform_sample_counts(ps1, function(x) x / sum(x))

plot_bar(ps2, fill="Genus")+
  geom_bar(stat="identity")+
  theme_cowplot()+
  scale_fill_brewer(palette="BrBG")

#ggsave(file="core.bar.pdf",width=8)

#not rel abun
plot_ordination(pseq.core,ordinate(pseq.core,"PCoA", "bray"),color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))#+

  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")

#rel abun
pseq.core.rel <- transform_sample_counts(pseq.core, function(x) x / sum(x))
plot_ordination(pseq.core.rel,ordinate(pseq.core.rel,"PCoA", "bray"),color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))#+

  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")

#by site
ps.core.mnw <- subset_samples(pseq.core,site=="MNW")
ps.core.mse <- subset_samples(pseq.core,site=="MSE")
ps.core.tnw <- subset_samples(pseq.core,site=="TNW")

plot_ordination(ps.core.mnw,ordinate(ps.core.mnw,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()#+

  #scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  #scale_shape_manual(name="Site",values=c(8,4,9))#+
  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")
# calculating core abundances #
core.sqs <- tax_table(pseq.core)
core.sqs.ids <- row.names(core.sqs)
core.sqs.ids
## [1] "sq1"  "sq2"  "sq4"  "sq7"  "sq8"  "sq10" "sq14" "sq21" "sq23"
ps.rare.trim.rel <- transform_sample_counts(ps.rare.trim, function(x) x / sum(x))
seq.rare.rel <- data.frame(otu_table(ps.rare.trim.rel))
tax.core <- tax_table(ps.rare.trim.rel)

seq.core <- seq.rare.rel %>% select(c(core.sqs.ids))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(core.sqs.ids)` instead of `core.sqs.ids` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
core.rel <- data.frame(colMeans(seq.core))

total.rel <- data.frame(colMeans(seq.rare.rel))
total.rel.ordered <- data.frame(total.rel[order(-total.rel$colMeans.seq.rare.rel.),,drop=FALSE])

2.1.1 Core stats

seq.core <- data.frame(otu_table(pseq.core))
seq.core <- data.frame(otu_table(pseq.core.rel))

dist.core <- vegdist(seq.core)
samdf.core <- data.frame(sample_data(pseq.core))
row.names(samdf.core)==row.names(seq.core)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.core,samdf.core$zone)
## Warning in betadisper(dist.core, samdf.core$zone): some squared distances are
## negative and changed to zero
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups     1 0.01878 0.018780  0.5182 0.4737
## Residuals 82 2.97183 0.036242
plot(bet.all) #very much overlap, not sig

bet.all <- betadisper(dist.core,samdf.core$site)
#anova(bet.all)
permutest(bet.all, pairwise = TRUE, permutations = 999) #MSE & TNW different again
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     2 0.1725 0.086250 2.2088    999  0.126
## Residuals 81 3.1628 0.039047                     
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##          MNW      MSE   TNW
## MNW          0.542000 0.199
## MSE 0.526739          0.055
## TNW 0.184599 0.048503
# Pairwise comparisons:
# (Observed p-value below diagonal, permuted p-value above diagonal)
#          MNW      MSE   TNW
# MNW          0.186000 0.383
# MSE 0.191245          0.029
# TNW 0.407966 0.036931      
plot(bet.all) 

adonis(seq.core ~ site, data=samdf.core, permutations=999)
## 
## Call:
## adonis(formula = seq.core ~ site, data = samdf.core, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## site       2    1.1252 0.56261  4.1991 0.09394  0.003 **
## Residuals 81   10.8526 0.13398         0.90606          
## Total     83   11.9779                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.core ~ site/zone, data=samdf.core, permutations=999)
## 
## Call:
## adonis(formula = seq.core ~ site/zone, data = samdf.core, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## site       2    1.1252 0.56261  4.6009 0.09394  0.003 **
## site:zone  3    1.3146 0.43818  3.5834 0.10975  0.008 **
## Residuals 78    9.5381 0.12228         0.79631          
## Total     83   11.9779                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.core ~ zone, strata=samdf.core$site,data=samdf.core, permutations=999)
## 
## Call:
## adonis(formula = seq.core ~ zone, data = samdf.core, permutations = 999,      strata = samdf.core$site) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## zone       1     0.064 0.064026 0.44068 0.00535  0.645
## Residuals 82    11.914 0.145291         0.99465       
## Total     83    11.978                  1.00000
pairwise.adonis(seq.core, factors = samdf.core$site, permutations = 999)
##        pairs  F.Model         R2 p.value p.adjusted
## 1 MNW vs MSE 3.335764 0.05718214   0.028      0.028
## 2 MNW vs TNW 2.095976 0.03804228   0.103      0.103
## 3 MSE vs TNW 7.113810 0.11640266   0.003      0.003
#MNW only marginally different than TNW surprisingly
#RELATIVE ABUNDANCE:
#        pairs  F.Model         R2 p.value p.adjusted
# 1 MNW vs MSE 3.573894 0.05804236   0.027      0.027 * 
# 2 MNW vs TNW 2.252231 0.03617912   0.096      0.096 .
# 3 MSE vs TNW 7.975852 0.11733361   0.004      0.004 **

#RAREFIED:
#MNW only marginally different than TNW surprisingly
#RELATIVE ABUNDANCE:
#        pairs  F.Model         R2 p.value p.adjusted
# 1 MNW vs MSE 3.404324 0.05828891   0.021      0.021 *
# 2 MNW vs TNW 2.306130 0.04169754   0.090      0.090 . 
# 3 MSE vs TNW 7.078557 0.11589267   0.004      0.004 **

2.1.2 Core stats by zone

#### MNW ####
# ps.core.mnw <- subset_samples(pseq.core,site=="MNW")
# ps.core.mse <- subset_samples(pseq.core,site=="MSE")
# ps.core.tnw <- subset_samples(pseq.core,site=="TNW")

ps.core.mnw <- subset_samples(pseq.core.rel,site=="MNW")
ps.core.mse <- subset_samples(pseq.core.rel,site=="MSE")
ps.core.tnw <- subset_samples(pseq.core.rel,site=="TNW")

seq.core.mnw <- data.frame(otu_table(ps.core.mnw))

dist.core.mnw <- vegdist(seq.core.mnw)
samdf.core.mnw <- data.frame(sample_data(ps.core.mnw))
row.names(samdf.core.mnw)==row.names(seq.core.mnw)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.core.mnw,samdf.core.mnw$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups     1 0.00212 0.002125  0.0461 0.8317
## Residuals 26 1.19876 0.046106
plot(bet.all) #very much overlap, not sig

adonis(seq.core.mnw ~ zone,data=samdf.core.mnw, permutations=999)
## 
## Call:
## adonis(formula = seq.core.mnw ~ zone, data = samdf.core.mnw,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.0385 0.038486  0.2685 0.01022  0.798
## Residuals 26    3.7268 0.143337         0.98978       
## Total     27    3.7653                  1.00000
#not sig
#same with rare or rel

#### MSE ####
seq.core.mse <- data.frame(otu_table(ps.core.mse))

dist.core.mse <- vegdist(seq.core.mse)
samdf.core.mse <- data.frame(sample_data(ps.core.mse))
row.names(samdf.core.mse)==row.names(seq.core.mse)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.core.mse,samdf.core.mse$zone)
## Warning in betadisper(dist.core.mse, samdf.core.mse$zone): some squared
## distances are negative and changed to zero
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value  Pr(>F)  
## Groups     1 0.08710 0.087100  2.9439 0.09766 .
## Residuals 27 0.79883 0.029586                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.all) #very much overlap, not sig

adonis(seq.core.mse ~ zone,data=samdf.core.mse, permutations=999)
## 
## Call:
## adonis(formula = seq.core.mse ~ zone, data = samdf.core.mse,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## zone       1    0.5289 0.52889  3.7109 0.12083  0.028 *
## Residuals 27    3.8481 0.14252         0.87917         
## Total     28    4.3770                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
# zone       1    0.6122 0.61216  3.9768 0.12838  0.014 *
# Residuals 27    4.1561 0.15393         0.87162         
# Total     28    4.7683                 1.00000         

#### TNW ####
seq.core.tnw <- data.frame(otu_table(ps.core.tnw))

dist.core.tnw <- vegdist(seq.core.tnw)
samdf.core.tnw <- data.frame(sample_data(ps.core.tnw))
row.names(samdf.core.tnw)==row.names(seq.core.tnw)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.core.tnw,samdf.core.tnw$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value   Pr(>F)   
## Groups     1 0.21246 0.212457  9.1454 0.005698 **
## Residuals 25 0.58078 0.023231                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.all) 

# Analysis of Variance Table
# 
# Response: Distances
#           Df Sum Sq Mean Sq F value  Pr(>F)  
# Groups     1 0.1582 0.15821  5.9475 0.02218 *
# Residuals 25 0.6650 0.02660                  

adonis(seq.core.tnw ~ zone,data=samdf.core.tnw, permutations=999)
## 
## Call:
## adonis(formula = seq.core.tnw ~ zone, data = samdf.core.tnw,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## zone       1   0.74717 0.74717  9.5148 0.27567  0.005 **
## Residuals 25   1.96318 0.07853         0.72433          
## Total     26   2.71035                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
# zone       1   0.89731 0.89731   11.12 0.30787  0.001 ***
# Residuals 25   2.01724 0.08069         0.69213           
# Total     26   2.91455                 1.00000           
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

2.2 Accessory

ps.rare.trim.otu <- data.frame(ps.trim@otu_table)
#ps.rare.trim.otu <- data.frame(ps.rare.trim@otu_table)
core.tax <- data.frame(pseq.core@tax_table)
core.ids <- c(rownames(core.tax))
ps.rare.trim.acc.otu <- ps.rare.trim.otu[,!colnames(ps.rare.trim.otu) %in% core.ids ]
row.names(samdf) <- samdf$id

#remake phyloseq object
ps.acc <- phyloseq(otu_table(ps.rare.trim.acc.otu, taxa_are_rows=FALSE), 
                         sample_data(samdf), 
                         tax_table(taxa2))
ps.acc #214 taxa accessory
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 214 taxa and 92 samples ]
## sample_data() Sample Data:       [ 92 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 214 taxa by 8 taxonomic ranks ]
plot_ordination(ps.acc,ordinate(ps.acc,"PCoA", "bray"),color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))#+

  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")
#ggsave(file="pcoa.acc.all.pdf")
#seq.acc <- data.frame(otu_table(ps.acc))
ps.acc.rel <- transform_sample_counts(ps.acc, function(x) x / sum(x))
seq.acc <- data.frame(otu_table(ps.acc.rel))

dist.acc <- vegdist(seq.acc)
samdf.acc <- data.frame(sample_data(ps.acc))
row.names(samdf.acc)==row.names(seq.acc)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE
bet.all <- betadisper(dist.acc,samdf.acc$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.003118 0.0031177  1.1698 0.2823
## Residuals 90 0.239871 0.0026652
plot(bet.all) #very much overlap, not sig

bet.all <- betadisper(dist.acc,samdf.acc$site)
#anova(bet.all)
permutest(bet.all, pairwise = TRUE, permutations = 999) #no diff in disp
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     2 0.0136 0.0067997 1.4789    999  0.206
## Residuals 89 0.4092 0.0045978                     
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##         MNW     MSE   TNW
## MNW         0.11800 0.679
## MSE 0.12735         0.221
## TNW 0.71445 0.23776
plot(bet.all) 

adonis(seq.acc ~ site, data=samdf.acc, permutations=999)
## 
## Call:
## adonis(formula = seq.acc ~ site, data = samdf.acc, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)    
## site       2     3.117 1.55830  4.2458 0.0871  0.001 ***
## Residuals 89    32.665 0.36703         0.9129           
## Total     91    35.782                 1.0000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.acc ~ site/zone, data=samdf.acc, permutations=999)
## 
## Call:
## adonis(formula = seq.acc ~ site/zone, data = samdf.acc, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2     3.117 1.55830  4.5057 0.08710  0.001 ***
## site:zone  3     2.922 0.97412  2.8166 0.08167  0.001 ***
## Residuals 86    29.743 0.34585         0.83123           
## Total     91    35.782                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.acc ~ zone, strata=samdf.acc$site,data=samdf.acc, permutations=999)
## 
## Call:
## adonis(formula = seq.acc ~ zone, data = samdf.acc, permutations = 999,      strata = samdf.acc$site) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## zone       1     0.791 0.79112  2.0348 0.02211  0.001 ***
## Residuals 90    34.991 0.38879         0.97789           
## Total     91    35.782                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.adonis(seq.acc, factors = samdf.acc$site, permutations = 999) #p < 0.001***
##        pairs  F.Model         R2 p.value p.adjusted
## 1 MNW vs MSE 5.881318 0.09206633   0.001      0.001
## 2 MNW vs TNW 3.194025 0.05054315   0.001      0.001
## 3 MSE vs TNW 3.745959 0.05876386   0.001      0.001
#        pairs  F.Model         R2 p.value p.adjusted
# 1 MNW vs MSE 5.881318 0.09206633   0.001      0.001
# 2 MNW vs TNW 3.194025 0.05054315   0.001      0.001
# 3 MSE vs TNW 3.745959 0.05876386   0.001      0.001
#by site
ps.acc.mnw <- subset_samples(ps.acc,site=="MNW")
ps.acc.mse <- subset_samples(ps.acc,site=="MSE")
ps.acc.tnw <- subset_samples(ps.acc,site=="TNW")

gg.pcoa.acc.mnw <- plot_ordination(ps.acc.mnw,ordinate(ps.acc.mnw,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))

gg.pcoa.acc.mse <- plot_ordination(ps.acc.mse,ordinate(ps.acc.mse,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))

gg.pcoa.acc.tnw <- plot_ordination(ps.acc.tnw,ordinate(ps.acc.tnw,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))

ggarrange(gg.pcoa.acc.mnw,gg.pcoa.acc.mse,gg.pcoa.acc.tnw,nrow=1,common.legend=T,legend="right")

#ggsave("pcoa.acc.zone.pdf",width=11,height=3)



#### by site - MNW ####
seq.acc.mnw <- data.frame(otu_table(ps.acc.mnw))

dist.acc.mnw <- vegdist(seq.acc.mnw)
samdf.acc.mnw <- data.frame(sample_data(ps.acc.mnw))
row.names(samdf.acc.mnw)==row.names(seq.acc.mnw)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.acc.mnw,samdf.acc.mnw$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.000030 0.0000300  0.0084 0.9274
## Residuals 28 0.099387 0.0035496
plot(bet.all) #very much overlap, not sig

adonis(seq.acc.mnw ~ zone,data=samdf.acc.mnw, permutations=999)
## 
## Call:
## adonis(formula = seq.acc.mnw ~ zone, data = samdf.acc.mnw, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## zone       1    0.8863 0.88634  2.3217 0.07657  0.001 ***
## Residuals 28   10.6893 0.38176         0.92343           
## Total     29   11.5756                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
# zone       1    0.7993 0.79929  2.1125 0.07514  0.002 **
# Residuals 26    9.8374 0.37836         0.92486          
# Total     27   10.6367                 1.00000          
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#### by site - MSE ####
seq.acc.mse <- data.frame(otu_table(ps.acc.mse))

dist.acc.mse <- vegdist(seq.acc.mse)
samdf.acc.mse <- data.frame(sample_data(ps.acc.mse))
row.names(samdf.acc.mse)==row.names(seq.acc.mse)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.acc.mse,samdf.acc.mse$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq F value  Pr(>F)  
## Groups     1 0.026177 0.0261770  4.4124 0.04481 *
## Residuals 28 0.166112 0.0059326                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.all) 

# Df   Sum Sq   Mean Sq F value  Pr(>F)  
# Groups     1 0.021763 0.0217633  4.5724 0.04169 *
# Residuals 27 0.128513 0.0047597                  
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

adonis(seq.acc.mse ~ zone,data=samdf.acc.mse, permutations=999)
## 
## Call:
## adonis(formula = seq.acc.mse ~ zone, data = samdf.acc.mse, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## zone       1    0.8862 0.88620  2.4952 0.08182  0.001 ***
## Residuals 28    9.9446 0.35516         0.91818           
## Total     29   10.8308                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
# zone       1    0.8113 0.81126  2.2489 0.07689  0.004 **
# Residuals 27    9.7400 0.36074         0.92311          
# Total     28   10.5513                 1.00000 

#### by site - TNW ####
seq.acc.tnw <- data.frame(otu_table(ps.acc.tnw))

dist.acc.tnw <- vegdist(seq.acc.tnw)
samdf.acc.tnw <- data.frame(sample_data(ps.acc.tnw))
row.names(samdf.acc.tnw)==row.names(seq.acc.tnw)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
bet.all <- betadisper(dist.acc.tnw,samdf.acc.tnw$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.009059 0.0090586  1.7768 0.1926
## Residuals 30 0.152947 0.0050982
plot(bet.all) #ns                

adonis(seq.acc.tnw ~ zone,data=samdf.acc.tnw, permutations=999)
## 
## Call:
## adonis(formula = seq.acc.tnw ~ zone, data = samdf.acc.tnw, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## zone       1    0.8751 0.87511  2.3634 0.07303  0.001 ***
## Residuals 30   11.1084 0.37028         0.92697           
## Total     31   11.9835                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#  Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
# zone       1    0.8429 0.84287  2.2735 0.08336  0.002 **
# Residuals 25    9.2685 0.37074         0.91664          
# Total     26   10.1113                 1.00000          

3 Bar plots [needs fixing]

3.1 All, by Phylum

# ps.sz <- merge_samples(ps.rare.trim, "site_zone")
# ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
# plot_bar(ps.rel.sz, fill="Class")+
#   geom_bar(stat="identity")

ps.all.tab <- psmelt(ps.rare.trim)%>%
  filter(!is.na(Abundance))%>%
  group_by(site,zone,site_zone,Phylum,OTU)%>%
  summarize_at("Abundance",mean)

ps.all.tab$site[ps.all.tab$site == "MNW"] <- "Mo'orea NW"
ps.all.tab$site[ps.all.tab$site == "MSE"] <- "Mo'orea SE"
ps.all.tab$site[ps.all.tab$site == "TNW"] <- "Tahiti NW"

ps.all.tab$zone[ps.all.tab$zone == "Forereef"] <- "FR"
ps.all.tab$zone[ps.all.tab$zone == "Backreef"] <- "BR"

gg.bar.all.phy <- ggplot(ps.all.tab,aes(x=zone,y=Abundance,fill=Phylum))+
  geom_bar(stat="identity")+
  theme_cowplot()+
  #theme(legend.position="none")+
  xlab('Reef zone')+  
  facet_wrap(~site)

gg.bar.all.phy
#ggsave(gg.bar.all,file="bac.bar.all.pdf",height=4)
pa <- psmelt(ps.all)
tb <- psmelt(ps.all)%>%
  filter(!is.na(Abundance))%>%
  group_by(site,zone,site_zone,Class,OTU)%>%
  summarize_at("Abundance",mean)

tb$zone <- gsub("out","FR",tb$zone)
tb$zone <- gsub("in","BR",tb$zone)

tb$site <- gsub("MNW","Mo'orea NW",tb$site)
tb$site <- gsub("MSE","Mo'orea SE",tb$site)
tb$site <- gsub("TNW","Tahiti NW",tb$site)

quartz()
ggplot(tb,aes(x=zone,y=Abundance,fill=Class))+
  geom_bar(stat="identity")+
  theme_cowplot()+
  #theme(legend.position="none")+
  xlab('Reef zone')+  
  facet_wrap(~site)

3.2 All, by Class

ps.sz <- merge_samples(ps.rare.trim, "site_zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
ps.rel.sz@sam_data$site <- c("a","b","c","a","b","c")
ps.rel.sz@sam_data$zone <- c("a","b","c","a","b","c")
plot_bar(ps.rel.sz,fill="Class")+
  geom_bar(stat="identity")+
  facet_wrap(~site*zone)

ps.all.tab <- psmelt(ps.rare.trim)%>%
  filter(!is.na(Abundance))%>%
  group_by(site,zone,site_zone,Class,OTU)%>%
  summarize_at("Abundance",mean)

ps.all.tab$site[ps.all.tab$site == "MNW"] <- "Mo'orea NW"
ps.all.tab$site[ps.all.tab$site == "MSE"] <- "Mo'orea SE"
ps.all.tab$site[ps.all.tab$site == "TNW"] <- "Tahiti NW"

ps.all.tab$zone[ps.all.tab$zone == "Forereef"] <- "FR"
ps.all.tab$zone[ps.all.tab$zone == "Backreef"] <- "BR"

gg.bar.all.cla <- ggplot(ps.all.tab,aes(x=zone,y=Abundance,fill=Class))+
  geom_bar(stat="identity")+
  theme_cowplot()+
  #theme(legend.position="none")+
  xlab('Reef zone')+  
  facet_wrap(~site)

gg.bar.all.cla

#ggsave(gg.bar.all,file="bac.bar.all.pdf",height=4)

4 PCOA plots

4.1 Plots - site

4.1.1 Rarefied

ord <- ordinate(ps.rare.trim, "PCoA", "bray")
gg.pcoa.site.rare <- plot_ordination(ps.rare.trim, ord,color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))#+
  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")
gg.pcoa.site.rare

ggsave("gg.bac.all.rare.site.pdf",width=5)
## Saving 5 x 5 in image

Stats

Help on adonis (here)[https://thebiobucket.blogspot.com/2011/04/assumptions-for-permanova-with-adonis.html#more]

seq.rare <- data.frame(otu_table(ps.rare.trim))

dist.rare <- vegdist(seq.rare)
samdf.rare <- data.frame(sample_data(ps.rare.trim))
row.names(samdf.rare)==row.names(seq.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.rare,samdf.rare$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00107 0.0010749  0.0514 0.8213
## Residuals 82 1.71631 0.0209306
plot(bet.all) #very much overlap, not sig

bet.all <- betadisper(dist.rare,samdf.rare$site)
permutest(bet.all, pairwise = TRUE, permutations = 999)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     2 0.10726 0.053628 2.3453    999  0.104
## Residuals 81 1.85220 0.022867                     
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##          MNW      MSE   TNW
## MNW          0.360000 0.221
## MSE 0.359986          0.050
## TNW 0.203563 0.049289
# Pairwise comparisons:
# (Observed p-value below diagonal, permuted p-value above diagonal)
#          MNW      MSE   TNW
# MNW          0.342000 0.215
# MSE 0.359986          0.045
# TNW 0.203563 0.049289     
plot(bet.all) #disp sig between MSE & TNW

adonis(seq.rare ~ site, data=samdf.rare, permutations=999)
## 
## Call:
## adonis(formula = seq.rare ~ site, data = samdf.rare, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.4833 0.74165  4.2386 0.09474  0.001 ***
## Residuals 81   14.1730 0.17498         0.90526           
## Total     83   15.6563                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.rare ~ site/zone, data=samdf.rare, permutations=999)
## 
## Call:
## adonis(formula = seq.rare ~ site/zone, data = samdf.rare, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.4833 0.74165  4.6756 0.09474  0.001 ***
## site:zone  3    1.8003 0.60011  3.7832 0.11499  0.001 ***
## Residuals 78   12.3727 0.15862         0.79027           
## Total     83   15.6563                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.rare ~ zone, strata=samdf.rare$site,data=samdf.rare, permutations=999)
## 
## Call:
## adonis(formula = seq.rare ~ zone, data = samdf.rare, permutations = 999,      strata = samdf.rare$site) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.2025 0.20253  1.0747 0.01294  0.291
## Residuals 82   15.4538 0.18846         0.98706       
## Total     83   15.6563                 1.00000
pairwise.adonis(seq.rare, factors = samdf.rare$site, permutations = 999)
##        pairs  F.Model         R2 p.value p.adjusted
## 1 MNW vs MSE 3.863638 0.06563709   0.004      0.004
## 2 MNW vs TNW 2.474825 0.04461168   0.037      0.037
## 3 MSE vs TNW 6.216217 0.10323162   0.001      0.001
#       pairs  F.Model         R2 p.value p.adjusted
# 1 MNW vs MSE 3.863638 0.06563709   0.005      0.005
# 2 MNW vs TNW 2.474825 0.04461168   0.034      0.034
# 3 MSE vs TNW 6.216217 0.10323162   0.001      0.001

4.1.2 Relative abundance

ps.trim.rel <- transform_sample_counts(ps.trim, function(x) x / sum(x))
ord.rel <- ordinate(ps.trim.rel, "PCoA", "bray")
gg.pcoa.site <- plot_ordination(ps.trim.rel, ord.rel,color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))+
  xlab("Axis 1 (44.1%)")+
  ylab("Axis 2 (23%)")+
  ggtitle("Relative abundance")
gg.pcoa.site

Stats

seq.trim <- data.frame(otu_table(ps.trim.rel))

dist.trim <- vegdist(seq.trim)
samdf.trim <- data.frame(sample_data(ps.trim))
row.names(samdf.trim)==row.names(seq.trim)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE
bet.all <- betadisper(dist.trim,samdf.trim$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00004 0.0000422  0.0017 0.9668
## Residuals 90 2.17370 0.0241522
#permutest(bet.all, pairwise = FALSE, permutations = 999)
plot(bet.all) #very much overlap, not sig

bet.all <- betadisper(dist.trim,samdf.trim$site)
#anova(bet.all)
permutest(bet.all, pairwise = TRUE, permutations = 999)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)  
## Groups     2 0.16261 0.081306 3.4989    999  0.038 *
## Residuals 89 2.06812 0.023237                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##          MNW      MSE   TNW
## MNW          0.232000 0.151
## MSE 0.226153          0.014
## TNW 0.151332 0.014949
plot(bet.all) #disp sig between MSE & TNW, same as rarefied

# Pairwise comparisons:
# (Observed p-value below diagonal, permuted p-value above diagonal)
#          MNW      MSE   TNW
# MNW          0.235000 0.164
# MSE 0.226153          0.019
# TNW 0.151332 0.014949      

adonis(seq.trim ~ site, data=samdf.trim, permutations=999)
## 
## Call:
## adonis(formula = seq.trim ~ site, data = samdf.trim, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## site       2    1.5696 0.78480  4.6469 0.09455  0.003 **
## Residuals 89   15.0308 0.16889         0.90545          
## Total     91   16.6004                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.trim ~ site/zone, data=samdf.trim, permutations=999)
## 
## Call:
## adonis(formula = seq.trim ~ site/zone, data = samdf.trim, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.5696 0.78480  5.0583 0.09455  0.001 ***
## site:zone  3    1.6880 0.56267  3.6266 0.10168  0.002 ** 
## Residuals 86   13.3428 0.15515         0.80376           
## Total     91   16.6004                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.trim ~ zone, strata=samdf.trim$site,data=samdf.trim, permutations=999)
## 
## Call:
## adonis(formula = seq.trim ~ zone, data = samdf.trim, permutations = 999,      strata = samdf.trim$site) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)
## zone       1    0.2076 0.20757  1.1396 0.0125  0.273
## Residuals 90   16.3929 0.18214         0.9875       
## Total     91   16.6004                 1.0000
pairwise.adonis(seq.trim, factors = samdf.trim$site, permutations = 999)
##        pairs  F.Model         R2 p.value p.adjusted
## 1 MNW vs MSE 4.059614 0.06541475   0.004      0.004
## 2 MNW vs TNW 2.631393 0.04201397   0.034      0.034
## 3 MSE vs TNW 7.077436 0.10551142   0.001      0.001
#all significantly different
#        pairs  F.Model         R2 p.value p.adjusted
# 1 MNW vs MSE 4.059614 0.06541475   0.005      0.005
# 2 MNW vs TNW 2.631393 0.04201397   0.031      0.031
# 3 MSE vs TNW 7.077436 0.10551142   0.001      0.001

4.1.3 Plot - both

They look super similar

ggarrange(gg.pcoa.site.rare,gg.pcoa.site,labels="AUTO",common.legend=TRUE,legend="right")

4.2 Plots - reef zones

ps.mnw.rel <- subset_samples(ps.trim.rel,site=="MNW")
ps.mnw <- subset_samples(ps.rare.trim,site=="MNW")

ps.mse.rel <- subset_samples(ps.trim.rel,site=="MSE")
ps.mse <- subset_samples(ps.rare.trim,site=="MSE")

ps.tnw.rel <- subset_samples(ps.trim.rel,site=="TNW")
ps.tnw <- subset_samples(ps.rare.trim,site=="TNW")

4.2.1 Mo’orea NW

Relative abundance - appears total overlap

ord.mnw.rel <- ordinate(ps.mnw.rel, "PCoA", "bray")
gg.mnw.rel <- plot_ordination(ps.mnw.rel, ord.mnw.rel,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea NW")+
  xlab("Axis 1 (38.8%)")+
  ylab("Axis 2 (29.8%)")+
  theme(axis.text=element_text(size=10))
gg.mnw.rel

Stats

seq.mnw.rel <- data.frame(otu_table(ps.mnw.rel))
samdf.mnw.rel <- data.frame(sample_data(ps.mnw.rel))
row.names(samdf.mnw.rel)==row.names(seq.mnw.rel)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mnw.rel <- vegdist(seq.mnw.rel)
bet.mnw.rel <- betadisper(dist.mnw.rel,samdf.mnw.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.mnw.rel) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00407 0.0040681  0.1711 0.6823
## Residuals 28 0.66588 0.0237815
#permutest(bet.mnw.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mnw.rel)

adonis(seq.mnw.rel ~ zone, data=samdf.mnw.rel, permutations=999) #not sig
## 
## Call:
## adonis(formula = seq.mnw.rel ~ zone, data = samdf.mnw.rel, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.1165 0.11650 0.68244 0.02379  0.608
## Residuals 28    4.7800 0.17072         0.97621       
## Total     29    4.8965                 1.00000

Rarefied

Slightly less overlap - but maybe still non-significant?

ord.mnw <- ordinate(ps.mnw, "PCoA", "bray")
gg.mnw <- plot_ordination(ps.mnw, ord.mnw,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea NW")+
  xlab("Axis 1 (38.9%)")+
  ylab("Axis 2 (30.4%)")+
  theme(axis.text=element_text(size=10))
gg.mnw

Stats

seq.mnw.rare <- data.frame(otu_table(ps.mnw))
samdf.mnw.rare <- data.frame(sample_data(ps.mnw))
row.names(samdf.mnw.rare)==row.names(seq.mnw.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mnw.rare <- vegdist(seq.mnw.rare)
bet.mnw.rare <- betadisper(dist.mnw.rare,samdf.mnw.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.mnw.rare) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00111 0.0011097   0.048 0.8284
## Residuals 26 0.60154 0.0231360
#permutest(bet.mnw.rare, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mnw.rare)

adonis(seq.mnw.rare ~ zone, data=samdf.mnw.rare, permutations=999) #not sig
## 
## Call:
## adonis(formula = seq.mnw.rare ~ zone, data = samdf.mnw.rare,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.1169 0.11692 0.66528 0.02495  0.605
## Residuals 26    4.5693 0.17574         0.97505       
## Total     27    4.6862                 1.00000

4.2.2 Mo’orea SE

Relative abundance - appears to separate more than MNW

ord.mse.rel <- ordinate(ps.mse.rel, "PCoA", "bray")
gg.mse.rel <- plot_ordination(ps.mse.rel, ord.mse.rel,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea SE")+
  xlab("Axis 1 (39.6%)")+
  ylab("Axis 2 (27.2%)")+
  theme(axis.text=element_text(size=10))
gg.mse.rel

Stats

seq.mse.rel <- data.frame(otu_table(ps.mse.rel))
samdf.mse.rel <- data.frame(sample_data(ps.mse.rel))
row.names(samdf.mse.rel)==row.names(seq.mse.rel)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mse.rel <- vegdist(seq.mse.rel)
bet.mse.rel <- betadisper(dist.mse.rel,samdf.mse.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.mse.rel) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00686 0.0068585  0.2188 0.6436
## Residuals 28 0.87784 0.0313514
#permutest(bet.mse.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mse.rel)

adonis(seq.mse.rel ~ zone, data=samdf.mse.rel, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.mse.rel ~ zone, data = samdf.mse.rel, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## zone       1    0.7797 0.77967   4.198 0.13038  0.007 **
## Residuals 28    5.2002 0.18572         0.86962          
## Total     29    5.9799                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rarefied

Equivalent

ord.mse <- ordinate(ps.mse, "PCoA", "bray")
gg.mse <- plot_ordination(ps.mse, ord.mse,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea SE")+
  xlab("Axis 1 (38.1%)")+
  ylab("Axis 2 (27.8%)")+
  theme(axis.text=element_text(size=10))
gg.mse

Stats

Same story

seq.mse.rare <- data.frame(otu_table(ps.mse))
samdf.mse.rare <- data.frame(sample_data(ps.mse))
row.names(samdf.mse.rare)==row.names(seq.mse.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mse.rare <- vegdist(seq.mse.rare)
bet.mse.rare <- betadisper(dist.mse.rare,samdf.mse.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.mse.rare) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00799 0.0079868  0.2595 0.6146
## Residuals 27 0.83094 0.0307756
#permutest(bet.mse.rare, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mse.rare)

adonis(seq.mse.rare ~ zone, data=samdf.mse.rare, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.mse.rare ~ zone, data = samdf.mse.rare,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## zone       1    0.6971 0.69710  3.7137 0.12091  0.006 **
## Residuals 27    5.0681 0.18771         0.87909          
## Total     28    5.7652                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2.3 Tahiti NW

Relative abundance - looks very silly

ord.tnw.rel <- ordinate(ps.tnw.rel, "PCoA", "bray")
gg.tnw.rel <- plot_ordination(ps.tnw.rel, ord.tnw.rel,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Tahiti NW")+
  #xlab("Axis 1 (60.2%)")+
  #ylab("Axis 2 (9.7%)")+
  theme(axis.text=element_text(size=10))
gg.tnw.rel
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

Stats

seq.tnw.rel <- data.frame(otu_table(ps.tnw.rel))
samdf.tnw.rel <- data.frame(sample_data(ps.tnw.rel))
row.names(samdf.tnw.rel)==row.names(seq.tnw.rel)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
dist.tnw.rel <- vegdist(seq.tnw.rel)
bet.tnw.rel <- betadisper(dist.tnw.rel,samdf.tnw.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.tnw.rel) #p < 0.05*
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## Groups     1 0.19049 0.19049   7.243 0.01152 *
## Residuals 30 0.78899 0.02630                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#permutest(bet.tnw.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.tnw.rel)

adonis(seq.tnw.rel ~ zone, data=samdf.tnw.rel, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.tnw.rel ~ zone, data = samdf.tnw.rel, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)    
## zone       1    0.7918 0.79183  7.0644 0.1906  0.001 ***
## Residuals 30    3.3626 0.11209         0.8094           
## Total     31    4.1544                 1.0000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rarefied

Equivalent

ord.tnw <- ordinate(ps.tnw, "PCoA", "bray")
gg.tnw <- plot_ordination(ps.tnw, ord.tnw,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Tahiti NW")+
  xlab("Axis 1 (59.4%)")+
  ylab("Axis 2 (10.8%)")+
  theme(axis.text=element_text(size=10))
gg.tnw

Stats

seq.tnw.rare <- data.frame(otu_table(ps.tnw))
samdf.tnw.rare <- data.frame(sample_data(ps.tnw))
row.names(samdf.tnw.rare)==row.names(seq.tnw.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.tnw.rare <- vegdist(seq.tnw.rare)
bet.tnw.rare <- betadisper(dist.tnw.rare,samdf.tnw.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.tnw.rare) #p < 0.01**
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value   Pr(>F)   
## Groups     1 0.20256 0.202560  8.2641 0.008141 **
## Residuals 25 0.61277 0.024511                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permutest(bet.tnw.rare, pairwise = FALSE, permutations = 999) #says the same thing
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)   
## Groups     1 0.20256 0.202560 8.2641    999  0.008 **
## Residuals 25 0.61277 0.024511                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.tnw.rare)

adonis(seq.tnw.rare ~ zone, data=samdf.tnw.rare, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.tnw.rare ~ zone, data = samdf.tnw.rare,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## zone       1    0.9863 0.98632   9.015 0.26503  0.001 ***
## Residuals 25    2.7352 0.10941         0.73497           
## Total     26    3.7215                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 Summary

  • No differences in results between relative abundance & rarefied plots/stats
  • Pairwise adonis significant between all three sites
  • Adonis significant across reef zones at TNW & MSE, but not MNW
  • Beta dispersion not significantly different except between TNW F & B (can definitely see it)

Relative abundance

ggarrange(gg.mnw.rel,gg.mse.rel,gg.tnw.rel,nrow=1,common.legend=TRUE,legend="right",labels="AUTO")
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

Rarefied

ggarrange(gg.mnw,gg.mse,gg.tnw,nrow=1,common.legend=TRUE,legend="right",labels=c("(a)","(b)","(c)"))

#ggsave("16s.pcoa.pdf",height=2.5,width=8)

5 ANCOM

Tutorial here

5.1 Setup

library(readr)
library(tidyverse)
#library(dplyr)
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
#install.packages('compositions')
library(compositions)
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
## 
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
## 
##     anova, cor, cov, dist, var
## The following objects are masked from 'package:base':
## 
##     %*%, norm, scale, scale.default
source("ancom_v2.1.R")

#setwd("/Volumes/GoogleDrive/My Drive/Moorea_revisions/mr16s_revised/analysis/03.community_comp")
otu_data_unt <- data.frame(ps.trim@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.trim@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = "~ 1 | site"

res.all = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.mnw,file="ancom.res.mnw.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.all$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.all$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.2 ANCOM by site

Note: shouldn’t be done on rarefied according to authors

Ran these three chunks once, then loading in data below

5.2.1 MNW

ps.mnw <- subset_samples(ps.trim,site=="MNW")

otu_data_unt <- data.frame(ps.mnw@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.mnw@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL

res.mnw = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.mnw,file="ancom.res.mnw.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.mnw$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.mnw$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.2.2 MSE

ps.mse <- subset_samples(ps.trim,site=="MSE")

otu_data_unt <- data.frame(ps.mse@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.mse@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL

res.mse = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.mse,file="ancom.res.mse.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.mse$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.mse$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.2.3 TNW

ps.tnw <- subset_samples(ps.trim,site=="TNW")

otu_data_unt <- data.frame(ps.tnw@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.tnw@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL

res.tnw = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.tnw,file="ancom.res.tnw.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.tnw$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.tnw$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.3 Synthesizing ANCOM results

Re-read in data

res.mnw <- readRDS("ancom.res.mnw.RDS")
res.mse <- readRDS("ancom.res.mse.RDS")
res.tnw <- readRDS("ancom.res.tnw.RDS")

Which ones are ‘significant’

mnw.out <- res.mnw$out
mnw.out.sig <- mnw.out[mnw.out$detected_0.6==TRUE,]
#1

mse.out <- res.mse$out
mse.out.sig <- mse.out[mse.out$detected_0.6==TRUE,]
#8

tnw.out <- res.tnw$out
tnw.out.sig <- tnw.out[tnw.out$detected_0.6==TRUE,]
#4

Subset the sig ones in phyloseq

want.mnw <- c(mnw.out.sig$taxa_id)
want.mse <- c(mse.out.sig$taxa_id)
want.tnw <- c(tnw.out.sig$taxa_id)

want <- c(want.mnw,want.mse,want.tnw)

ps.sig.taxa <- subset_taxa(ps.rare.trim,row.names(ps.trim@tax_table) %in% want)
#37 & 13 are in there twice

#looks so cool!
plot_bar(ps.sig.taxa,x="site_zone",y="Abundance",fill="Genus")+
  facet_wrap(~Genus,scales="free")

#ggsave("sig.genus.abun.pdf",width=11)